{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import pandas as pd\n",
    "import os\n",
    "import seaborn as sns\n",
    "import matplotlib.pyplot as plt\n",
    "%matplotlib inline\n",
    "\n",
    "from sklearn.preprocessing import LabelEncoder\n",
    "from sklearn.model_selection import train_test_split\n",
    "from sklearn.linear_model import LogisticRegression\n",
    "from sklearn.svm import SVC, LinearSVC\n",
    "from sklearn.ensemble import RandomForestClassifier\n",
    "from sklearn.neighbors import KNeighborsClassifier\n",
    "from sklearn.naive_bayes import GaussianNB\n",
    "from sklearn.linear_model import Perceptron\n",
    "from sklearn.linear_model import SGDClassifier\n",
    "from sklearn.tree import DecisionTreeClassifier\n",
    "from sklearn.ensemble import GradientBoostingClassifier\n",
    "from sklearn.model_selection import GridSearchCV, cross_val_score, StratifiedKFold, learning_curve\n",
    "from xgboost import XGBClassifier\n",
    "import warnings\n",
    "warnings.filterwarnings('ignore')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "train_df = pd.read_csv('train.csv')\n",
    "test_df = pd.read_csv('test.csv')\n",
    "combine_df = pd.concat([train_df,test_df])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "#Title\n",
    "combine_df['Title'] = combine_df['Name'].apply(lambda x: x.split(', ')[1]).apply(lambda x: x.split('.')[0])\n",
    "combine_df['Title'] = combine_df['Title'].replace(['Don','Dona', 'Major', 'Capt', 'Jonkheer', 'Rev', 'Col','Sir','Dr'],'Mr')\n",
    "combine_df['Title'] = combine_df['Title'].replace(['Mlle','Ms'], 'Miss')\n",
    "combine_df['Title'] = combine_df['Title'].replace(['the Countess','Mme','Lady','Dr'], 'Mrs')\n",
    "df = pd.get_dummies(combine_df['Title'],prefix='Title')\n",
    "combine_df = pd.concat([combine_df,df],axis=1)\n",
    "\n",
    "#Name_length\n",
    "combine_df['Name_Len'] = combine_df['Name'].apply(lambda x: len(x))\n",
    "combine_df['Name_Len'] = pd.qcut(combine_df['Name_Len'],5)\n",
    "\n",
    "\n",
    "#Dead_female_family & Survive_male_family\n",
    "combine_df['Surname'] = combine_df['Name'].apply(lambda x:x.split(',')[0])\n",
    "dead_female_surname = list(set(combine_df[(combine_df.Sex=='female') & (combine_df.Age>=12)\n",
    "                              & (combine_df.Survived==0) & ((combine_df.Parch>0) | (combine_df.SibSp > 0))]['Surname'].values))\n",
    "survive_male_surname = list(set(combine_df[(combine_df.Sex=='male') & (combine_df.Age>=12)\n",
    "                              & (combine_df.Survived==1) & ((combine_df.Parch>0) | (combine_df.SibSp > 0))]['Surname'].values))\n",
    "combine_df['Dead_female_family'] = np.where(combine_df['Surname'].isin(dead_female_surname),0,1)\n",
    "combine_df['Survive_male_family'] = np.where(combine_df['Surname'].isin(survive_male_surname),0,1)\n",
    "combine_df = combine_df.drop(['Name','Surname'],axis=1)\n",
    "\n",
    "\n",
    "#Age & isChild\n",
    "group = combine_df.groupby(['Title', 'Pclass'])['Age']\n",
    "combine_df['Age'] = group.transform(lambda x: x.fillna(x.median()))\n",
    "combine_df = combine_df.drop('Title',axis=1)\n",
    "combine_df['IsChild'] = np.where(combine_df['Age']<=12,1,0)\n",
    "combine_df['Age'] = pd.cut(combine_df['Age'],5)\n",
    "combine_df = combine_df.drop('Age',axis=1)\n",
    "\n",
    "#ticket\n",
    "combine_df['Ticket_Lett'] = combine_df['Ticket'].apply(lambda x: str(x)[0])\n",
    "combine_df['Ticket_Lett'] = combine_df['Ticket_Lett'].apply(lambda x: str(x))\n",
    "\n",
    "combine_df['High_Survival_Ticket'] = np.where(combine_df['Ticket_Lett'].isin(['1', '2', 'P']),1,0)\n",
    "combine_df['Low_Survival_Ticket'] = np.where(combine_df['Ticket_Lett'].isin(['A','W','3','7']),1,0)\n",
    "combine_df = combine_df.drop(['Ticket','Ticket_Lett'],axis=1)\n",
    "\n",
    "#Embarked\n",
    "#combine_df = combine_df.drop('Embarked',axis=1)\n",
    "combine_df.Embarked = combine_df.Embarked.fillna('S')\n",
    "df = pd.get_dummies(combine_df['Embarked'],prefix='Embarked')\n",
    "combine_df = pd.concat([combine_df,df],axis=1).drop('Embarked',axis=1)\n",
    "\n",
    "#FamilySize\n",
    "combine_df['FamilySize'] = np.where(combine_df['SibSp']+combine_df['Parch']==0, 'Alone',\n",
    "                                    np.where(combine_df['SibSp']+combine_df['Parch']<=3, 'Small', 'Big'))\n",
    "df = pd.get_dummies(combine_df['FamilySize'],prefix='FamilySize')\n",
    "combine_df = pd.concat([combine_df,df],axis=1).drop(['SibSp','Parch','FamilySize'],axis=1)\n",
    "\n",
    "\n",
    "#Cabin\n",
    "combine_df['Cabin_isNull'] = np.where(combine_df['Cabin'].isnull(),0,1)\n",
    "combine_df = combine_df.drop('Cabin',axis=1)\n",
    "\n",
    "#PClass\n",
    "df = pd.get_dummies(combine_df['Pclass'],prefix='Pclass')\n",
    "combine_df = pd.concat([combine_df,df],axis=1).drop('Pclass',axis=1)\n",
    "\n",
    "\n",
    "#Sex\n",
    "df = pd.get_dummies(combine_df['Sex'],prefix='Sex')\n",
    "combine_df = pd.concat([combine_df,df],axis=1).drop('Sex',axis=1)\n",
    "\n",
    "#Fare\n",
    "combine_df['Fare'].fillna(combine_df['Fare'].dropna().median(),inplace=True)\n",
    "combine_df['Low_Fare'] = np.where(combine_df['Fare']<=8.662,1,0)\n",
    "combine_df['High_Fare'] = np.where(combine_df['Fare']>=26,1,0)\n",
    "combine_df = combine_df.drop('Fare',axis=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "features = combine_df.drop([\"PassengerId\",\"Survived\"], axis=1).columns\n",
    "le = LabelEncoder()\n",
    "for feature in features:\n",
    "    le = le.fit(combine_df[feature])\n",
    "    combine_df[feature] = le.transform(combine_df[feature])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAb4AAAEWCAYAAAAZwvJqAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvNQv5yAAAIABJREFUeJzt3Xu8HuO5//HPN6EiEnFI2NQhLXEI2jiroo67ztKqs25RrVJtERRlt3HoRkvRbkooUURDi+anWoqsX5xZOYszSepYCRJJhCLX/mPuxWN5nrVmJes5rMz3/XrNa80zc8/MNfdK1vXc98zco4jAzMysKLrVOwAzM7NacuIzM7NCceIzM7NCceIzM7NCceIzM7NCceIzM7NCceIzKwhJIWndNH+FpP/OU3YRjnOYpLsXNU6zapOf4zOrTNKhwFBgA2AuMBH4RUQ8UNfAFoGkAAZExPOdVVZSf2AasHREfNgZcZpVm1t8ZhVIGgpcAvwPsCqwFnA5sF+F8kvVLjrrbP79FYcTn1kZkvoAZwPHRcStETE/Ij6IiP8XEaekMsMk/UnSDZLeAYZIWkbSJZJeTdMlkpZJ5ftKukPSbElvSbpfUre07lRJr0iaK+kZSbuUiWlrSa9L6l6y7BuSJqf5rSQ9nPb/mqT/lfS5Cuc3QtK5JZ9PSdu8Kuk7rcruJWmCpHckvSRpWMnqsennbEnzJH1F0hBJD5Rsv62kxyXNST+3LVnXJOkcSQ+mc79bUt8KMbdVf2tKulXSTElvSvrftLybpDMlzZD0hqQ/pN8tkvqnLt2jJP0TuC8t30bSQ+k4kyTtWBLDEEkvplinSTqsXKzW4CLCkydPrSZgd+BDYKk2ygwDPgAGk32JXJYsWT4CrAL0Ax4CzknlzwOuAJZO0/aAgPWBl4DVU7n+wDoVjvkCsFvJ51uA09L85sA2wFJpH08BJ5SUDWDdND8COLfkXP8FbAwsB4xsVXZHYJN0jl9KZQeXxBql9QQMAR5I8ysBbwPfTnEdkj6vnNY3pXNaL9VfE3B+hXOvVH/dgUnAxSn+HsB2aZvvAM8DXwR6AbcC17eK/Q9pu2WBzwNvAnum890tfe6XyrwDrJ+2Xw3YqN7/Vj11fHKLz6y8lYFZ0f51q4cj4vaIWBgRC4DDgLMj4o2ImAmcRfZHH7IkuRqwdmStx/sj+wv6EbAMMFDS0hExPSJeqHC8m8iSB5J6k/2BvgkgIsZFxCMR8WFETAeuBL6W41wPBK6NiCciYj5ZQv9YRDRFxJR0jpPT8fLsF2Av4LmIuD7FdRPwNLBPSZlrI+LZVH83A4Mq7KtS/W0FrA6cElnL/L345BrsYcCvI+LFiJgHnA4c3Kpbc1jabgFwOHBnRNyZzvcfQDNZPQMsBDaWtGxEvBYRU3PWgzUQJz6z8t4E+ua47vNSq8+rAzNKPs9IywB+Rdb6uDt1l50GENkNJCeQJZw3JP1R0uqUNxL4Zuo+/SYwPiJmAEhaL3UFvp66Xv8HKNttWCbm0vMojb+li3VM6kacAxyTc78t+57RatkMspZVi9dL5t8la5mVU7b+gDWBGRW+pJT7fSxFds22Rem5rw0ckLo5Z0uaDWwHrJa+FBxEdv6vSfqrpA0qxGoNzInPrLyHgffJujHb0vq26FfJ/ni2WCstIyLmRsRJEfFFYF9gaMu1vIgYGRHbpW0DuKDswSKeJPvjvQdwKFkibPE7stbUgIhYHvgpWVdge14jSx6lMZcaCYwG1oyIPmTdjS37be+28Nb10bL/V3LE9Slt1N9LwFoVvqSU+318SNZd+/GuS+ZfIusKXaFkWi4izk8x3BURu5G1PJ8GruroeVj9OfGZlRERc4CfAZdJGiypp6SlJe0h6ZdtbHoTcKakfukmjZ8BNwBI2lvSupIEzCHr4lwoaX1JO6dW3HvAArIutUpGAscDO5Bd42vRm+wa1LzUEjk25+neTHZjzkBJPYGft1rfG3grIt6TtBVZwm0xM8X6xQr7vhNYT9KhkpaSdBAwELgjZ2wfq1R/wGNkyft8SctJ6iHpq2mzm4ATJX1BUi+yVvCoNrqwbwD2kfR1Sd3TvnaUtIakVSXtJ2k5si9F82j792QNyonPrIKIuIjsGb4zyf7AvwT8ELi9jc3OJbsmNBmYAoxPywAGAPeQ/cF8GLg8IsaQXd87H5hF1u23Ctm1qEparrHdFxGzSpafTJaU5pK1REblPM+/kT22cR9ZV+J9rYr8ADhb0lyyRH5zybbvAr8AHkxdg9u02vebwN7ASWTdxz8B9m4Vd15l6y8iPiK7Zrgu8E/gZbIuSYBrgOvJ7j6dRvbF4keVDhARL5E9rvJTPvmdn0L2t7Ib2b+HV4G3yH4Heb9cWAPxA+xmZlYobvGZmVmhOPGZmVmhOPGZmVmhOPGZmVmheFDWBtS3b9/o379/vcMwM+tSxo0bNysi+rVXzomvAfXv35/m5uZ6h2Fm1qVIaj1KUFnu6jQzs0Jxi68BzZw/j989Prb9gmZmDeLYLXeodwi5ucVnZmaF4sRnZmaF4sRnZmaF4sRnZmaF4sRnZmaF4sRnZmaF4sRnZmaF4sRnZmaF4sRnZmaF4sTXSSTNK7NsmKRXJE2U9KSkQ+oRm5mZfcKJr/oujohBwH7AlZKWrndAZmZF5rE6ayQinpP0LrAi8Ea94zEzy+PiY47PVW5U7z6599nU1LSI0XQOt/hqRNJmwHMRUTbpSTpaUrOk5nmzZ9c4OjOz4nCLr/pOlHQksB6wT6VCETEcGA6w9oYbRI1iMzNr04lXXJqrnN/OYKUujoiNgP2B30vqUe+AzMyKzImvRiJiNNAMHFHvWMzMisyJr/P0lPRyyTS0TJmzgaGSXO9mZnXia3ydJCLaTWYRMQ5YvwbhmJlZBW55mJlZoTjxmZlZoTjxmZlZoTjxmZlZoTjxmZlZoTjxmZlZofhxhgbUb7leXWr4HzOzrsQtPjMzKxQnPjMzKxQnPjMzKxQnPjMzKxQnPjMzKxQnPjMzKxQ/ztCAFi6cy4IF99Y7DDOzdi277C71DqHD3OIzM7NCceIzM7NCceIzM7NCceIzM7NCceIzM7NCceIzM7NCceIzM7NCceIzM7NCceLrAElnSJoqabKkiZJ+Lum8VmUGSXoqzfeSdKWkFySNk9Qkaev6RG9mZuCRW3KT9BVgb2CziHhfUl9gIDACOL2k6MHATWn+amAaMCAiFkr6QtrGzMzqxIkvv9WAWRHxPkBEzALGSnpb0tYR8WgqdyDwdUnrAFsDh0XEwrTNNLJEaGbWsL7+9aG5y3brtmKH9t3U1NTBaDqfuzrzuxtYU9Kzki6X9LW0/CayVh6StgHeiojngI2AiRHxUZ6dSzpaUrOk5lmzZlcjfjMzwy2+3CJinqTNge2BnYBRkk4DRgEPSTqJT3dzdnT/w4HhAJtttn50TtRmZh13112/zl22Kw5S7cTXAan11gQ0SZoCHBERIyRNA74G7A98JRWfCnxZUve8rT4zM6s+d3XmJGl9SQNKFg0CZqT5m4CLgRcj4mWAiHgBaAbOkqS0j/6S9qph2GZm1ooTX369gOskPSlpMtndmcPSulvIrum17ub8LrAq8LykJ8juAH2jJtGamVlZ7urMKSLGAdtWWDcLWLrM8neA71U5NDMz6wC3+MzMrFCc+MzMrFCc+MzMrFCc+MzMrFCc+MzMrFCc+MzMrFD8OEMD6tatd5ccBsjMrCtwi8/MzArFic/MzArFic/MzArFic/MzArFic/MzArFic/MzArFjzM0oHlz32PsmKfqHYaZ2cd22GnDeofQadziMzOzQnHiMzOzQnHiMzOzQnHiMzOzQnHiMzOzQnHiMzOzQnHiMzOzQnHiMzOzQnHiy0HSvJL5PSU9K2ltScMkvStplQplQ9JFJZ9PljSsZoGbmdlnOPF1gKRdgN8Ae0TEjLR4FnBShU3eB74pqW8t4jMzs/Z5yLKcJO0AXAXsGREvlKy6Bhgi6YKIeKvVZh8Cw4ETgTNqE6mZWccdf+IRba7vs0LPdvfR1NTUSdFUl1t8+SwD3A4MjoinW62bR5b8jq+w7WXAYZL6tHUASUdLapbUPHtO6/xpZmadxS2+fD4AHgKOonyC+w0wUdKFrVdExDuS/gD8GFhQ6QARMZysdcgG628cnRG0mVlel158XZvrPUh18SwEDgS2kvTT1isjYjYwEjiuwvaXkCXN5aoWoZmZ5eLEl1NEvAvsRdZteVSZIr8Gvk+ZVnS69nczWfIzM7M6cuLrgJTAdgfOlLRvq3WzgNvIrgeWcxHguzvNzOrM1/hyiIheJfMvAV9IH0e3KjcUGFphu38B7d8WZWZmVeUWn5mZFYoTn5mZFYoTn5mZFUqua3ySVgTWLC0fEeOrFZSZmVm1tJv4JJ0DDAFeAFoerA5g5+qFZWZmVh15WnwHAutExL+rHYyZmVm15Ul8TwArAG9UORZLevXusUQND2Rm1kjyJL7zgAmSniB7zQ4AEbFv5U3MzMwaU57Edx1wATCFbMxKMzOzLitP4ns3In5T9UjMzMxqIE/iu1/SeWTDc5V2dfpxBjMz63LyJL5N089tSpb5cQYzM+uS2k18EbFTLQIxMzOrhTwPsC8D7A/059Mjt5xdvbCK7cN/vcbMi8+tdxhmZm3qd+KZ9Q5hkeTp6vwLMAcYR8k1PjMzs64oT+JbIyJ2r3okZmZmNZDn7QwPSdqk6pGYmZnVQMUWn6QpZHdvLgUcKelFsq5OARERX6pNiGZmZp2nra7OvWsWhZmZWY1UTHwRMQNA0vUR8e3SdZKuB75ddkMzM7MGluca30alHyR1BzavTjhmZmbVVTHxSTpd0lzgS5LeSdNcstcT/aVmEZqZmXWiiokvIs6LiN7AryJi+TT1joiVI+L0GsZoZmbWadpq8W2QZm+RtFnrqbMCkDRM0smLsN1D7ay/U9IKix7ZZ/Y3RNLqnbU/MzOrj7bu6hwKHA1cVGZd3Qepjoht21m/ZycfcgjZ2+hfbb1CUveI+KiTj2dmVnODL/t97rJL/+We3GWbmpoWIZrqaKur82hJ3YAzI2KnVtNiJT1JZ0h6VtIDwPpp2TqS/i5pnKT7W1qcklaVdJukSWnaNi2fl36uJmmspImSnpC0fVo+XVLfND80rXtC0glpWX9JT0m6StJUSXdLWrZCvN8CtgBuTMdZNu3/AknjgQPaiL+fpD9LejxNX61wjKMlNUtqfnP+/MWpXjMza4Miou0C0oSI2LTNQh05oLQ5MALYmqzFOR64AtgDOCYinpO0NXBeROwsaRTwcERcku4o7RURcyTNi4hekk4CekTEL9L6nhExV9J0smS1djreNmQP3z8KHA68DTwPbBEREyXdDIyOiBsqxN0EnBwRzenzdODyiPhl+nxvhfhHpnIPSFoLuCsiNmyrjgat+fn4x9BjO1q1ZmY11WiDVEsaFxFbtFcuz1id90raH7g12suS+WwP3BYR7wJIGg30ALYlu57YUm6Z9HNn4L8AUnfinFb7exy4RtLSwO0RMbHV+u3S8ean492aYhgNTCspP47sDRQdMSrts1cb8e8KDCxZvrykXhExr4PHMjOzTpAn8X2f7HrfR5IW8MmQZct3YhzdgNkRMaijG0bEWEk7AHsBIyT9OiL+kHPz0rdNfASU7epsQ0ufZFvxdwO2iYj3OrhvMzOrgnYfYE+PMHSLiKVLHmlYnKQ3FhicrpP1BvYB3gWmSToAQJkvp/L3Asem5d0l9SndmaS1gX9FxFXA1UDrO07vT8frKWk54BtpWUfNBXqXWxER77QR/93Aj0ri7XByNzOzzpNn5BYk7SvpwjQt1hieETGerItwEvA3sq5KgMOAoyRNAqYC+6XlxwM7KRs0exwwsNUudwQmSZoAHARcWuZ4I4DHyK7vXR0RExYh9BHAFS03t5RZXyn+HwNbSJos6UngmEU4tpmZdZI8N7ecD2wJ3JgWHQI0+yH26vHNLWbWFSzJN7fsCQyKiIVpx9cBEwAnPjMz63LyJD6AFYC30nyftgp2dZIuA1o/a3dpRFxbj3jMzKxz5Ul85wETJI0hu6NzB+C0qkZVRxFxXL1jMDOz6mk38UXETenh7S3TolMj4vWqRmVmZlYl7Sa+kgGpX04/V0+PBcyIiA+rFpmZmVkV5OnqvJzs2bjJZF2dG5Pdrt9H0rERcXcV4zMzM+tUeRLfq8BRETEVQNJA4GzgJ8CtZA9oWydaatXVGu42YTOzJUWeB9jXa0l6ABHxJLBBRLxYvbDMzMyqI0+Lb6qk3wF/TJ8PAp6UtAzwQdUiMzMzq4I8Lb4hZK/vOSFNL6ZlHwA7VSswMzOzasjzOMMCsrewl3sTu1+tY2ZmXUrFxJcGha40kGdExJcrrDMzM2tYbbX4yr2FQcCaeJzOqnp99nx+dftj9Q7DzKyiUwZvVe8QFlnFxBcRM1rmJW0KHAocAEwD/lz90MzMzDpfW12d65G9gugQYBbZO/QUEb6hxczMuqy2ujqfJntT+d4R8TyApBNrEpWZmVmVtPU4wzeB14Axkq6StAvZNT4zM7Muq2Lii4jbI+JgYANgDNkzfKtI+p2k/6xVgGZmZp2p3QfYI2J+RIyMiH2ANcjevn5q1SMzMzOrgjwjt3wsIt6OiOERsUu1AjIzM6umDiU+MzOzrs6Jz8zMCqXLJD5J0yX1rdK+l5F0j6SJkg6q0jEGSdqzGvs2M7P88ryWaLFJEtnD7wtrcbxFsClARAzKu4Gk7hHxUQeOMQjYArizg7GZmVXVFWce2+Ft/npJ7w6Vb2pq6vAxqqVqLT5J/SU9I+kPwBPA7yU1S5oq6aySctMlnSVpvKQpkjZIy1eWdHcqfzUlzxBKGirpiTSdUHK8pyWNkPSspBsl7SrpQUnPSSo7sJykVYAbgC1Ti28dSbtImpDiuSa9e7Al1gskjQcOSGX/LmmcpPtLYj8gxTZJ0lhJnyN7a/1BlVqVko5O9dM8/53ZnfNLMDOzz1BEpRcwLOaOpf5k7+7bNiIekbRSRLwlqTtwL/DjiJgsaTpwUUT8VtIPgM0i4ruSfgPMioizJe0F3AH0A9YGRgDbkCXDR4HDgbfJ3hu4KTAVeByYBBwF7AscGRGDK8S6I3ByROwtqQfwHLBLRDybEvf4iLgkxXp5RPwybXcvcExEPCdpa+C8iNg5vdli94h4RdIKETFb0hBgi4j4YXt1t8a6G8bxF16Xt6rNzGquEQepljQuIrZor1y1r/HNiIhH0vyBqaU0AdgIGFhS7tb0cxzQP83vQNYSIyL+SpbYALYDbkvPF85L226f1k2LiCmpS3UqcG9kmX1KyX7bs37az7Pp83UplhajACT1ArYFbpE0EbgSWC2VeRAYIel7QPecxzUzsxqo9jW++QCSvgCcDGwZEW9LGgH0KCn3fvr50WLG9H7J/MKSzwsXc7+l5qef3YDZ5a4LRsQxqQW4FzBO0uaddGwzM1tMtbqrc3myhDFH0qrAHjm2GUv2KiQk7QGsmJbfDwyW1FPScsA30rLO8gzQX9K66fO3gf/fulBEvANMk3RAilGSvpzm14mIRyPiZ8BMsncYzgU6djXYzMw6XU0SX0RMIuvifBoYSdYV2J6zgB0kTSUbMPufaV/jya7xPUZ2fe/qiJjQibG+BxxJ1oU5hay1eEWF4ocBR0maRNa1ul9a/qt0Y8wTwENk1xrHAAOr+ciEmZm1r2o3t9ii880tZtbofHOLmZlZF1GTB9gbhaQjgeNbLX4wIo6rRzxmZlZ7hUp8EXEtcG294zAzs/pxV6eZmRWKE5+ZmRWKE5+ZmRVKoa7xdRX/scJyDXmrsJnZksAtPjMzKxQnPjMzKxQnPjMzKxQnPjMzKxQnPjMzKxTf1dmAPpzzMjPv+Em9wzAzW2T99v5lvUOoyC0+MzMrFCc+MzMrFCc+MzMrFCc+MzMrFCc+MzMrFCc+MzMrFCc+MzMrFCc+MzMrFCe+EpJWlTRS0ouSxkl6WNI3JO0oaY6kiZImS7pH0ippmyGSZkqaIOk5SXdJ2jatuyxt86SkBWl+oqRv1fdMzcyKy4kvkSTgdmBsRHwxIjYHDgbWSEXuj4hBEfEl4HHguJLNR0XEphExADgfuFXShhFxXEQMAvYEXkjbD4qIP9XuzMzMrJSHLPvEzsC/I+KKlgURMQP4raQdW5alBNkbeL7cTiJijKThwNHAiVWN2Mysxgaf/sdc5Za+8LFc5ZqamhYjmkXjFt8nNgLGt7F+e0kTgX8CuwLXtFF2PLBBRw4u6WhJzZKa35yzoCObmplZB7jFV4Gky4DtgH8Dp5B1de6d1p0K/BI4ptLmHT1eRAwHhgMMGvAfsSgxm5lV2+3nHZyrnAep7hqmApu1fIiI44BdgH5lyo4GdmhjX5sCT3VqdGZm1imc+D5xH9BD0rEly3pWKLsd8EK5FZK+RnZ976rODc/MzDqDuzqTiAhJg4GLJf0EmAnMB05NRVqu8QmYA3y3ZPODJG1HliinAftHhFt8ZmYNyImvRES8RvYIQzl9KmwzAhjRzn6nAxsvRmhmZtZJ3NVpZmaF4sRnZmaF4sRnZmaF4sRnZmaF4sRnZmaF4sRnZmaF4sRnZmaF4uf4GtBSfdZo6HHuzMy6Mrf4zMysUJz4zMysUJz4zMysUJz4zMysUJz4zMysUHxXZwOa/e4r3D7x9HqHYWa2WAYPOq/eIZTlFp+ZmRWKE5+ZmRWKE5+ZmRWKE5+ZmRWKE5+ZmRWKE5+ZmRWKE5+ZmRWKE5+ZmRVKoRKfpOmS+lZYd7ikyZKmSpok6WpJK6R1TZKekTRR0lOSjm61zylpelLSuZJ6SNoklZ8o6S1J09L8PbU6XzMz+yyP3AJI2h04EdgjIl6R1B04AlgVmJ2KHRYRzZJWAl6QNCIi/p3W7RQRsyT1AoYDV0bEEcCgtP8RwB0R8acanpaZmZXRpROfpC2B3wNbAd2Bx4BDgGOAnYGXgA+Aa0qSzk8k7QEsAA6NiOeBM4CTI+IVgIj4CLimwmF7AfOBj1qviIh5ko4BXpK0UkS81TlnamZWX2d+98YOb3NJr4c7VL6pqanDx1gUXTrxRcTjkkYD5wLLAjcA6wH9gYHAKsBTfDqJzYmITST9F3AJsDewETC+ncPdKOl9YABwQkqO5WJ6R9K0VO7RvOeSuk+PBui32vJ5NzMzsw7q0okvORt4HHgP+DFwEXBLRCwEXpc0plX5m0p+Xtx6Z5I2Aa4HegM/jYhRaVVLV2c/4CFJf4+IGRViUkdPIiKGk3WTsu7A1aKj25uZVdO5Vx/W4W08SHX1rEzW/dgb6JGjfJSZnwpsBhARUyJiEPA3slbkpzeOmEnWOty63M4l9SZrcT6bL3wzM6ulJSHxXQn8N3AjcAHwILC/pG6SVgV2bFX+oJKfLR3Q5wEXSlqjpNxnkh6ApJ7ApsALZdb1Ai4Hbo+ItxfpbMzMrKq6dFdnuk73QUSMTHdiPgTcCrwMPEl2c8t4YE7JZitKmgy8T3YjDBFxZ+rC/Fvaz2zgCeCuku1ulLQAWAYYERHjStaNkSSyLxK3Aed0/tmamVlnUMSSdzlJUq90h+XKZHd6fjUiXq93XHmtO3C1uHDkkHqHYWa2WGp9jU/SuIjYor1yXbrF14Y70sPnnwPO6UpJz8zMqmuJTHwRsWO9YzAzs8a0JNzcYmZmlpsTn5mZFYoTn5mZFYoTn5mZFYoTn5mZFcoSeVdnV7dCz8837Bh3ZmZdnVt8ZmZWKE58ZmZWKEvkkGVdnaS5wDP1jqPB9AVm1TuIBuM6Kc/18llFqZO1I6Jfe4V8ja8xPZNnvLkikdTsOvk010l5rpfPcp18mrs6zcysUJz4zMysUJz4GtPwegfQgFwnn+U6Kc/18lmukxK+ucXMzArFLT4zMysUJz4zMysUJ746krS7pGckPS/ptDLrl5E0Kq1/VFL/2kdZWznqZKikJyVNlnSvpLXrEWcttVcnJeX2lxSSlvjb1vPUiaQD07+VqZJG1jrGesjx/2ctSWMkTUj/h/asR5x1FxGe6jAB3YEXgC8CnwMmAQNblfkBcEWaPxgYVe+4G6BOdgJ6pvljXScfl+sNjAUeAbaod9z1rhNgADABWDF9XqXecTdIvQwHjk3zA4Hp9Y67HpNbfPWzFfB8RLwYEf8G/gjs16rMfsB1af5PwC6SVMMYa63dOomIMRHxbvr4CLBGjWOstTz/TgDOAS4A3qtlcHWSp06+B1wWEW8DRMQbNY6xHvLUSwDLp/k+wKs1jK9hOPHVz+eBl0o+v5yWlS0TER8Cc4CVaxJdfeSpk1JHAX+rakT1126dSNoMWDMi/lrLwOooz7+T9YD1JD0o6RFJu9csuvrJUy/DgMMlvQzcCfyoNqE1Fg9ZZl2SpMOBLYCv1TuWepLUDfg1MKTOoTSapci6O3ck6xUYK2mTiJhd16jq7xBgRERcJOkrwPWSNo6IhfUOrJbc4qufV4A1Sz6vkZaVLSNpKbKuiTdrEl195KkTJO0KnAHsGxHv1yi2emmvTnoDGwNNkqYD2wCjl/AbXPL8O3kZGB0RH0TENOBZskS4JMtTL0cBNwNExMNAD7IBrAvFia9+HgcGSPqCpM+R3bwyulWZ0cARaf5bwH2RrkovodqtE0mbAleSJb0iXLdps04iYk5E9I2I/hHRn+y6574R0VyfcGsiz/+d28lae0jqS9b1+WItg6yDPPXyT2AXAEkbkiW+mTWNsgE48dVJumb3Q+Au4Cng5oiYKulsSfumYr8HVpb0PDAUqHgr+5IgZ538CugF3CJpoqTW/7GXKDnrpFBy1sldwJuSngTGAKdExJLcW5K3Xk4CvidpEnATMGQJ/zJdlocsMzOzQnGLz8zMCsWJz8zMCsWJz8zMCsWJz8zMCsWJz8zMCsWJz6xBpTct3FDyeSlJMyXdUYNjtxzr/Gofy6zWnPjMGtd8YGNJy6bPu1FmJJsq2Y1stJMDqjkwehqRyKymnPjMGtudwF5p/hCyh44BkLScpGskPZber7ZfWt5f0v2Sxqdp27R8R0lNkv4k6WlJN7aR1A4BLiUb6eMrJcfcUtJDkial4/aW1F3ShZKeSO94+1EqOz2NmoKkLSQ1pfmw44HVAAACiklEQVRhkq6X9CDZWJFl401lT5U0JR3vfEnrSBpfsn5A6WezPPxty6yx/RH4Were/BJwDbB9WncG2TB235G0AvCYpHuAN4DdIuI9SQPIkmXL2J2bAhuRvY7mQeCrwAOlB5TUA9gV+D6wAlkSfCgNgzUKOCgiHpe0PLAAOBroDwyKiA8lrZTjvAYC20XEAkk9y8UraQ+y1+psHRHvSlopIt6SNEfSoIiYCBwJXJu/Os3c4jNraBExmSypHELW+iv1n8BpkiYCTWTjLq4FLA1cJWkKcAtZkmnxWES8nEbjn5j23drewJiIWAD8GRgsqTuwPvBaRDyeYnsnDZO1K3Blmici3spxaqPT/mkj3l2Ba1vev1iy36uBI1NMBwGFeLu6dR63+Mwa32jgQrJBl0vfxyhg/4h4prSwpGHAv4Avk325LX05benbLD6i/N+AQ4Dt0tseSMfcGXi9g3F/yCdfrnu0Wje/ZP7ENuIt58/Az4H7gHFL+hic1vnc4jNrfNcAZ0XElFbL7wJ+1HKdLr25ArLXV72WWnXfBrrnPVDqvtweWKvkjQ/HkSXDZ4DVJG2ZyvZON6f8A/h+y40qJV2d04HN0/z+bRy2Urz/IGvZ9Szdb0S8l879d7ib0xaBE59Zg0tdk78ps+ocsm7CyZKmps8AlwNHpBH4N+DTrav2fIPsumFpy/AvwD5kLcyDgN+mff+DrCV3NdlNMJPT8kPTdmcBl0pqJmtdVlI23oj4O1lrtzl1555css2NwELg7g6cmxngtzOYWRck6WSgT0T8d71jsa7H1/jMrEuRdBuwDtl1R7MOc4vPzMwKxdf4zMysUJz4zMysUJz4zMysUJz4zMysUJz4zMysUP4PNn667wP9AQYAAAAASUVORK5CYII=\n",
      "text/plain": [
       "<matplotlib.figure.Figure at 0x7f7d6c9dc6a0>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "X_all = combine_df.iloc[:891,:].drop([\"PassengerId\",\"Survived\"], axis=1)\n",
    "Y_all = combine_df.iloc[:891,:][\"Survived\"]\n",
    "X_test = combine_df.iloc[891:,:].drop([\"PassengerId\",\"Survived\"], axis=1)\n",
    "\n",
    "logreg = LogisticRegression()\n",
    "svc = SVC()\n",
    "knn = KNeighborsClassifier(n_neighbors = 3)\n",
    "decision_tree = DecisionTreeClassifier()\n",
    "random_forest = RandomForestClassifier(n_estimators=300,min_samples_leaf=4,class_weight={0:0.745,1:0.255})\n",
    "gbdt = GradientBoostingClassifier(n_estimators=500,learning_rate=0.03,max_depth=3)\n",
    "xgb = XGBClassifier(max_depth=3, n_estimators=300, learning_rate=0.03)\n",
    "clfs = [logreg, svc, knn, decision_tree, random_forest, gbdt, xgb]\n",
    "\n",
    "kfold = 10\n",
    "cv_results = []\n",
    "for classifier in clfs :\n",
    "    cv_results.append(cross_val_score(classifier, X_all.values, y = Y_all.values, scoring = \"accuracy\", cv = kfold, n_jobs=4))\n",
    "\n",
    "cv_means = []\n",
    "cv_std = []\n",
    "for cv_result in cv_results:\n",
    "    cv_means.append(cv_result.mean())\n",
    "    cv_std.append(cv_result.std())\n",
    "\n",
    "ag = [\"LR\",\"SVC\",'KNN','decision_tree',\"random_forest\",\"GBDT\",\"xgbGBDT\"]\n",
    "cv_res = pd.DataFrame({\"CrossValMeans\":cv_means,\"CrossValerrors\": cv_std,\n",
    "                       \"Algorithm\":ag})\n",
    "\n",
    "g = sns.barplot(\"CrossValMeans\",\"Algorithm\",data = cv_res, palette=\"Set3\",orient = \"h\",**{'xerr':cv_std})\n",
    "g.set_xlabel(\"Mean Accuracy\")\n",
    "g = g.set_title(\"Cross validation scores\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "LR 0.8730793893996142\n",
      "SVC 0.8674489274770174\n",
      "KNN 0.8506829531267733\n",
      "decision_tree 0.8629670298490524\n",
      "random_forest 0.8598098967200091\n",
      "GBDT 0.8843408807172851\n",
      "xgbGBDT 0.8854267393031439\n"
     ]
    }
   ],
   "source": [
    "for i in range(7):\n",
    "    print(ag[i],cv_means[i])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.metrics import precision_score\n",
    "\n",
    "class Bagging(object):\n",
    "    \n",
    "    def __init__(self,estimators):\n",
    "        self.estimator_names = []\n",
    "        self.estimators = []\n",
    "        for i in estimators:\n",
    "            self.estimator_names.append(i[0])\n",
    "            self.estimators.append(i[1])\n",
    "        self.clf = LogisticRegression()\n",
    "    \n",
    "    def fit(self, train_x, train_y):\n",
    "        for i in self.estimators:\n",
    "            i.fit(train_x,train_y)\n",
    "        x = np.array([i.predict(train_x) for i in self.estimators]).T\n",
    "        y = train_y\n",
    "        self.clf.fit(x, y)\n",
    "    \n",
    "    def predict(self,x):\n",
    "        x = np.array([i.predict(x) for i in self.estimators]).T\n",
    "        #print(x)\n",
    "        return self.clf.predict(x)\n",
    "        \n",
    "    \n",
    "    def score(self,x,y):\n",
    "        s = precision_score(y,self.predict(x))\n",
    "        #print(s)\n",
    "        return s"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "lr = LogisticRegression()\n",
    "rf = RandomForestClassifier(n_estimators=300,min_samples_leaf=4,class_weight={0:0.745,1:0.255})\n",
    "gbdt = GradientBoostingClassifier(n_estimators=500,learning_rate=0.03,max_depth=3)\n",
    "xgbGBDT = XGBClassifier(max_depth=3, n_estimators=500, learning_rate=0.03)\n",
    "clfs = [logreg, svc, knn, decision_tree, random_forest, gbdt, xgb]\n",
    "\n",
    "bag = Bagging([('xgb',xgb),('lr',lr),('gbdt',gbdt),('rf',rf)])\n",
    "\n",
    "from sklearn.metrics import precision_score"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "88.459"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "score = 0\n",
    "for i in range(0,10):\n",
    "    num_test = 0.20\n",
    "    X_train, X_cv, Y_train, Y_cv = train_test_split(X_all.values, Y_all.values, test_size=num_test)\n",
    "    bag.fit(X_train, Y_train)\n",
    "    #Y_test = bag.predict(X_test)\n",
    "    acc_xgb = round(bag.score(X_cv, Y_cv) * 100, 2)\n",
    "    score+=acc_xgb\n",
    "score/10"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "bag.fit(X_all.values, Y_all.values)\n",
    "Y_test = bag.predict(X_test.values).astype(int)\n",
    "submission = pd.DataFrame({\n",
    "        \"PassengerId\": test_df[\"PassengerId\"],\n",
    "        \"Survived\": Y_test\n",
    "    })\n",
    "submission.to_csv(r'submission.csv', index=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.5.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
